import numpy as np
import pandas as pd
import jieba
import jieba.analyse
import sys
def format_str(content):
    #content = unicode(content)
    content_str = ''
    for i in content:
        if is_chinese(i):
            content_str = content_str+i
    return content_str
def is_chinese(uchar):
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    else:
        return False

def fenci(argv):
     text = format_str(str(argv[1]))
     #jieba.load_userdict(dicpath) 自定义字典
     with open('stop_words.txt','r',encoding = 'utf-8') as f:
        stop = f.read()
     stop = stop.split('\n')
     res = []
     t = jieba.cut(text)
     dic ={}  #同义词
     temp = []
     for tt in t:
       word = tt
       #print(word)
       if word in dic.keys():
          word = dic[word]
       if word not in stop:
          temp.append(word)
     res.append(','.join(temp))
     print(res)   
     return res 
      
    
   
    
   # print ("Length of list using naive method is : " + str(result))
  
if __name__ == '__main__':
    fenci(sys.argv)